Implement transformation application

This commit is contained in:
2025-06-24 15:53:20 +02:00
parent 8cc9a64111
commit 284723c978
11 changed files with 134 additions and 111 deletions

View File

@@ -4,5 +4,5 @@ import java.util.*
interface SelectorDefinition {
fun steps(): Set<Step>
fun transform(): Optional<Set<TransformationStep>>
fun transform(): Optional<List<TransformationStep>>
}

View File

@@ -6,5 +6,5 @@ import io.smallrye.config.WithConverter
interface TransformationStep {
fun name(): String
@WithConverter(EmptyStringConverter::class)
fun parameters(): MutableList<String?>
fun parameters(): MutableList<String>
}

View File

@@ -1,8 +1,7 @@
package com.rak.controller
import com.rak.config.SourcesConfiguration
import com.rak.model.RegionalSet
import com.rak.service.ScrapeService
import com.rak.service.SourceService
import jakarta.ws.rs.Consumes
import jakarta.ws.rs.GET
import jakarta.ws.rs.Path
@@ -13,15 +12,11 @@ import org.jboss.resteasy.reactive.RestQuery
@Path("/api")
class ExampleResource(
private val sourcesConfiguration: SourcesConfiguration,
class ScrapeController(
private val scrapeService: ScrapeService,
private val sourceService: SourceService
) {
companion object {
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
}
@GET
@Path("/{provider}/set")
@@ -32,8 +27,8 @@ class ExampleResource(
provider: String,
@RestQuery
setName: String
): List<Map<String, String>> {
return scrapeService.extractSet(
): List<RegionalSet> {
return scrapeService.scrapeSet(
provider,
setName
)
@@ -49,7 +44,7 @@ class ExampleResource(
@RestQuery
cardName: String
): Map<String, String> {
return scrapeService.extractCard(
return scrapeService.scrapeCard(
provider,
cardName
)

View File

@@ -0,0 +1,7 @@
package com.rak.model
data class RegionalSet(
val id: String,
val language: String,
val key: String
)

View File

@@ -2,5 +2,5 @@ package com.rak.model.transform
@FunctionalInterface
fun interface ParameterizedTransformation : AbstractTransformation {
fun apply(input: String, vararg parameters: String): Any?
fun apply(input: String, parameters: List<String>): String
}

View File

@@ -2,5 +2,5 @@ package com.rak.model.transform
@FunctionalInterface
fun interface Transformation : AbstractTransformation {
fun apply(input: String): String?
fun apply(input: String): String
}

View File

@@ -1,5 +1,6 @@
package com.rak.model.transform
import com.rak.config.TransformationStep
import java.util.concurrent.ConcurrentHashMap
class TransformationRegistry {
@@ -33,17 +34,19 @@ class TransformationRegistry {
parameterizedTransformation.put(name, transformation)
}
fun getTransformation(name: String, parameters: List<String>? = null): AbstractTransformation {
fun getTransformation(transformationStep: TransformationStep): AbstractTransformation {
val name = transformationStep.name()
val parameters = transformationStep.parameters()
return when {
transformations.containsKey(name) -> {
if (!parameters.isNullOrEmpty()) {
if (parameters.isNotEmpty()) {
throw IllegalArgumentException("'$name' doesn't accept parameters")
} else {
transformations[name]!!
}
}
parameterizedTransformation.containsKey(name) -> {
if (parameters.isNullOrEmpty()) {
if (parameters.isEmpty()) {
throw IllegalArgumentException("'$name' requires parameters")
} else {
parameterizedTransformation[name]!!
@@ -53,4 +56,21 @@ class TransformationRegistry {
}
}
fun applyTransformations(input: String, steps: List<TransformationStep>): String {
return steps.fold(input) { current, step ->
val actualStep = getTransformation(step)
when (actualStep) {
is Transformation ->
transformations[step.name()]?.apply(current)
?: throw IllegalArgumentException("Unknown transformation: ${step.name()}")
is ParameterizedTransformation ->
parameterizedTransformation[step.name()]?.apply(current, step.parameters())
?: throw IllegalArgumentException("Unknown transformation: ${step.name()}")
else -> throw IllegalStateException("Invalid transformation type")
}
}
}
}

View File

@@ -0,0 +1,73 @@
package com.rak.service
import com.rak.config.Step
import com.rak.model.RegionalSet
import com.rak.model.transform.TransformationRegistry
import com.rak.util.XPathUtil
import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
@ApplicationScoped
class ExtractionService(
private val sourceService: SourceService,
) {
private val transformationRegistry: TransformationRegistry = TransformationRegistry()
fun extractSet(document: Document, provider: String): List<RegionalSet> {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val regionalSetSelector = source.getItems().regionalSet().get()
val regionalSetRoot = document.select(regionalSetSelector.rootSelector().value())
return regionalSetRoot.map {
var setId: String = extractTextFromRootBySteps(
it,
regionalSetSelector.idSelector().steps()
) ?: throw IllegalStateException("Parameter 'id' could not be found")
setId = transformationRegistry.applyTransformations(setId, regionalSetSelector.idSelector().transform().get())
val setLanguage: String = extractTextFromRootBySteps(
it,
regionalSetSelector.languageSelector().steps()
) ?: throw IllegalStateException("Parameter 'language' could not be found")
val setKey: String = extractTextFromRootBySteps(
it,
regionalSetSelector.regionKeySelector().steps()
) ?: throw IllegalStateException("Parameter 'key' could not be found")
RegionalSet(
setId,
setLanguage,
setKey
)
}
}
private fun extractTextFromRootBySteps(
root: Element,
steps: Set<Step>
): String? {
var currentElement: Element? = root.clone()
var result: String? = null
for (index in 0 until steps.size) {
val currentStep = steps.elementAtOrNull(index) ?: return null
if (currentElement == null) {
throw IllegalStateException()
}
if (index == steps.size - 1) {
result = XPathUtil.extractResult(currentElement, currentStep.value())
}
else {
currentElement = XPathUtil.getNextElement(currentElement, currentStep.value())
}
}
return result
}
}

View File

@@ -1,29 +0,0 @@
package com.rak.service
import jakarta.ws.rs.GET
import jakarta.ws.rs.Path
import jakarta.ws.rs.QueryParam
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
/**
* To use it via injection.
*
* ```kotlin
* @Inject
* @RestClient
* lateinit var myRemoteService: MyRemoteService
*
* fun doSomething() {
* val restClientExtensions = myRemoteService.getExtensionsById("io.quarkus:quarkus-rest-client")
* }
* ```
*/
@RegisterRestClient(baseUri = "https://stage.code.quarkus.io/api")
interface MyRemoteService {
@GET
@Path("/extensions")
fun getExtensionsById(@QueryParam("id") id: String): Set<Extension>
data class Extension(val id: String, val name: String, val shortName: String, val keywords: List<String>)
}

View File

@@ -1,86 +1,30 @@
package com.rak.service
import com.rak.config.RegionalSetDefinition
import com.rak.config.SourcesConfiguration
import com.rak.config.Step
import com.rak.util.XPathUtil
import com.rak.model.RegionalSet
import jakarta.enterprise.context.ApplicationScoped
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
@ApplicationScoped
class ScrapeService(
private val sourceService: SourceService
private val sourceService: SourceService,
private val extractionService: ExtractionService,
) {
companion object {
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
}
private fun extractTextFromRootBySteps(
root: Element,
steps: Set<Step>
): String? {
var currentElement: Element? = root.clone()
var result: String? = null
for (index in 0 until steps.size) {
val currentStep = steps.elementAtOrNull(index) ?: return null
if (currentElement == null) {
throw IllegalStateException()
}
if (index == steps.size - 1) {
result = XPathUtil.extractResult(currentElement, currentStep.value())
}
else {
currentElement = XPathUtil.getNextElement(currentElement, currentStep.value())
}
}
return result
}
fun extractSet(
fun scrapeSet(
provider: String,
setName: String,
): List<Map<String, String>> {
val source =
sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
): List<RegionalSet> {
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
val path: String = normalizePath(setName)
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
val regionalSetSelector = source.getItems().regionalSet().get()
val regionalSetRoot = document.select(regionalSetSelector.rootSelector().value())
return regionalSetRoot.map {
val setId: String? = extractTextFromRootBySteps(
it,
regionalSetSelector.idSelector().steps()
)
val setLanguage: String? = extractTextFromRootBySteps(
it,
regionalSetSelector.languageSelector().steps()
)
val setKey: String? = extractTextFromRootBySteps(
it,
regionalSetSelector.regionKeySelector().steps()
)
mapOf(
Pair("id", setId ?: "N/A"),
Pair("language", setLanguage ?: "N/A"),
Pair("key", setKey ?: "N/A"),
)
}
return extractionService.extractSet(document, provider)
}
fun extractCard(
fun scrapeCard(
provider: String,
cardName: String,
): Map<String, String> {

View File

@@ -0,0 +1,13 @@
package com.rak.service
import com.rak.model.transform.TransformationRegistry
import jakarta.enterprise.context.ApplicationScoped
@ApplicationScoped
class TransformService(
private val transformationRegistry: TransformationRegistry = TransformationRegistry()
) {
}