Implement transformation application
This commit is contained in:
@@ -4,5 +4,5 @@ import java.util.*
|
|||||||
|
|
||||||
interface SelectorDefinition {
|
interface SelectorDefinition {
|
||||||
fun steps(): Set<Step>
|
fun steps(): Set<Step>
|
||||||
fun transform(): Optional<Set<TransformationStep>>
|
fun transform(): Optional<List<TransformationStep>>
|
||||||
}
|
}
|
||||||
@@ -6,5 +6,5 @@ import io.smallrye.config.WithConverter
|
|||||||
interface TransformationStep {
|
interface TransformationStep {
|
||||||
fun name(): String
|
fun name(): String
|
||||||
@WithConverter(EmptyStringConverter::class)
|
@WithConverter(EmptyStringConverter::class)
|
||||||
fun parameters(): MutableList<String?>
|
fun parameters(): MutableList<String>
|
||||||
}
|
}
|
||||||
@@ -1,8 +1,7 @@
|
|||||||
package com.rak.controller
|
package com.rak.controller
|
||||||
|
|
||||||
import com.rak.config.SourcesConfiguration
|
import com.rak.model.RegionalSet
|
||||||
import com.rak.service.ScrapeService
|
import com.rak.service.ScrapeService
|
||||||
import com.rak.service.SourceService
|
|
||||||
import jakarta.ws.rs.Consumes
|
import jakarta.ws.rs.Consumes
|
||||||
import jakarta.ws.rs.GET
|
import jakarta.ws.rs.GET
|
||||||
import jakarta.ws.rs.Path
|
import jakarta.ws.rs.Path
|
||||||
@@ -13,15 +12,11 @@ import org.jboss.resteasy.reactive.RestQuery
|
|||||||
|
|
||||||
|
|
||||||
@Path("/api")
|
@Path("/api")
|
||||||
class ExampleResource(
|
class ScrapeController(
|
||||||
private val sourcesConfiguration: SourcesConfiguration,
|
|
||||||
private val scrapeService: ScrapeService,
|
private val scrapeService: ScrapeService,
|
||||||
private val sourceService: SourceService
|
|
||||||
) {
|
) {
|
||||||
|
|
||||||
companion object {
|
|
||||||
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
|
|
||||||
}
|
|
||||||
|
|
||||||
@GET
|
@GET
|
||||||
@Path("/{provider}/set")
|
@Path("/{provider}/set")
|
||||||
@@ -32,8 +27,8 @@ class ExampleResource(
|
|||||||
provider: String,
|
provider: String,
|
||||||
@RestQuery
|
@RestQuery
|
||||||
setName: String
|
setName: String
|
||||||
): List<Map<String, String>> {
|
): List<RegionalSet> {
|
||||||
return scrapeService.extractSet(
|
return scrapeService.scrapeSet(
|
||||||
provider,
|
provider,
|
||||||
setName
|
setName
|
||||||
)
|
)
|
||||||
@@ -49,7 +44,7 @@ class ExampleResource(
|
|||||||
@RestQuery
|
@RestQuery
|
||||||
cardName: String
|
cardName: String
|
||||||
): Map<String, String> {
|
): Map<String, String> {
|
||||||
return scrapeService.extractCard(
|
return scrapeService.scrapeCard(
|
||||||
provider,
|
provider,
|
||||||
cardName
|
cardName
|
||||||
)
|
)
|
||||||
7
src/main/kotlin/com/rak/model/RegionalSet.kt
Normal file
7
src/main/kotlin/com/rak/model/RegionalSet.kt
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
package com.rak.model
|
||||||
|
|
||||||
|
data class RegionalSet(
|
||||||
|
val id: String,
|
||||||
|
val language: String,
|
||||||
|
val key: String
|
||||||
|
)
|
||||||
@@ -2,5 +2,5 @@ package com.rak.model.transform
|
|||||||
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
fun interface ParameterizedTransformation : AbstractTransformation {
|
fun interface ParameterizedTransformation : AbstractTransformation {
|
||||||
fun apply(input: String, vararg parameters: String): Any?
|
fun apply(input: String, parameters: List<String>): String
|
||||||
}
|
}
|
||||||
@@ -2,5 +2,5 @@ package com.rak.model.transform
|
|||||||
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
fun interface Transformation : AbstractTransformation {
|
fun interface Transformation : AbstractTransformation {
|
||||||
fun apply(input: String): String?
|
fun apply(input: String): String
|
||||||
}
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
package com.rak.model.transform
|
package com.rak.model.transform
|
||||||
|
|
||||||
|
import com.rak.config.TransformationStep
|
||||||
import java.util.concurrent.ConcurrentHashMap
|
import java.util.concurrent.ConcurrentHashMap
|
||||||
|
|
||||||
class TransformationRegistry {
|
class TransformationRegistry {
|
||||||
@@ -33,17 +34,19 @@ class TransformationRegistry {
|
|||||||
parameterizedTransformation.put(name, transformation)
|
parameterizedTransformation.put(name, transformation)
|
||||||
}
|
}
|
||||||
|
|
||||||
fun getTransformation(name: String, parameters: List<String>? = null): AbstractTransformation {
|
fun getTransformation(transformationStep: TransformationStep): AbstractTransformation {
|
||||||
|
val name = transformationStep.name()
|
||||||
|
val parameters = transformationStep.parameters()
|
||||||
return when {
|
return when {
|
||||||
transformations.containsKey(name) -> {
|
transformations.containsKey(name) -> {
|
||||||
if (!parameters.isNullOrEmpty()) {
|
if (parameters.isNotEmpty()) {
|
||||||
throw IllegalArgumentException("'$name' doesn't accept parameters")
|
throw IllegalArgumentException("'$name' doesn't accept parameters")
|
||||||
} else {
|
} else {
|
||||||
transformations[name]!!
|
transformations[name]!!
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
parameterizedTransformation.containsKey(name) -> {
|
parameterizedTransformation.containsKey(name) -> {
|
||||||
if (parameters.isNullOrEmpty()) {
|
if (parameters.isEmpty()) {
|
||||||
throw IllegalArgumentException("'$name' requires parameters")
|
throw IllegalArgumentException("'$name' requires parameters")
|
||||||
} else {
|
} else {
|
||||||
parameterizedTransformation[name]!!
|
parameterizedTransformation[name]!!
|
||||||
@@ -53,4 +56,21 @@ class TransformationRegistry {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun applyTransformations(input: String, steps: List<TransformationStep>): String {
|
||||||
|
return steps.fold(input) { current, step ->
|
||||||
|
val actualStep = getTransformation(step)
|
||||||
|
when (actualStep) {
|
||||||
|
is Transformation ->
|
||||||
|
transformations[step.name()]?.apply(current)
|
||||||
|
?: throw IllegalArgumentException("Unknown transformation: ${step.name()}")
|
||||||
|
|
||||||
|
is ParameterizedTransformation ->
|
||||||
|
parameterizedTransformation[step.name()]?.apply(current, step.parameters())
|
||||||
|
?: throw IllegalArgumentException("Unknown transformation: ${step.name()}")
|
||||||
|
|
||||||
|
else -> throw IllegalStateException("Invalid transformation type")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
73
src/main/kotlin/com/rak/service/ExtractionService.kt
Normal file
73
src/main/kotlin/com/rak/service/ExtractionService.kt
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
package com.rak.service
|
||||||
|
|
||||||
|
import com.rak.config.Step
|
||||||
|
import com.rak.model.RegionalSet
|
||||||
|
import com.rak.model.transform.TransformationRegistry
|
||||||
|
import com.rak.util.XPathUtil
|
||||||
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
|
import org.jsoup.nodes.Document
|
||||||
|
import org.jsoup.nodes.Element
|
||||||
|
|
||||||
|
@ApplicationScoped
|
||||||
|
class ExtractionService(
|
||||||
|
private val sourceService: SourceService,
|
||||||
|
) {
|
||||||
|
|
||||||
|
private val transformationRegistry: TransformationRegistry = TransformationRegistry()
|
||||||
|
|
||||||
|
fun extractSet(document: Document, provider: String): List<RegionalSet> {
|
||||||
|
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||||
|
val regionalSetSelector = source.getItems().regionalSet().get()
|
||||||
|
|
||||||
|
val regionalSetRoot = document.select(regionalSetSelector.rootSelector().value())
|
||||||
|
|
||||||
|
return regionalSetRoot.map {
|
||||||
|
var setId: String = extractTextFromRootBySteps(
|
||||||
|
it,
|
||||||
|
regionalSetSelector.idSelector().steps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'id' could not be found")
|
||||||
|
|
||||||
|
setId = transformationRegistry.applyTransformations(setId, regionalSetSelector.idSelector().transform().get())
|
||||||
|
|
||||||
|
val setLanguage: String = extractTextFromRootBySteps(
|
||||||
|
it,
|
||||||
|
regionalSetSelector.languageSelector().steps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'language' could not be found")
|
||||||
|
val setKey: String = extractTextFromRootBySteps(
|
||||||
|
it,
|
||||||
|
regionalSetSelector.regionKeySelector().steps()
|
||||||
|
) ?: throw IllegalStateException("Parameter 'key' could not be found")
|
||||||
|
|
||||||
|
RegionalSet(
|
||||||
|
setId,
|
||||||
|
setLanguage,
|
||||||
|
setKey
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun extractTextFromRootBySteps(
|
||||||
|
root: Element,
|
||||||
|
steps: Set<Step>
|
||||||
|
): String? {
|
||||||
|
var currentElement: Element? = root.clone()
|
||||||
|
var result: String? = null
|
||||||
|
|
||||||
|
for (index in 0 until steps.size) {
|
||||||
|
val currentStep = steps.elementAtOrNull(index) ?: return null
|
||||||
|
if (currentElement == null) {
|
||||||
|
throw IllegalStateException()
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index == steps.size - 1) {
|
||||||
|
result = XPathUtil.extractResult(currentElement, currentStep.value())
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
currentElement = XPathUtil.getNextElement(currentElement, currentStep.value())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
package com.rak.service
|
|
||||||
|
|
||||||
import jakarta.ws.rs.GET
|
|
||||||
import jakarta.ws.rs.Path
|
|
||||||
import jakarta.ws.rs.QueryParam
|
|
||||||
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient
|
|
||||||
|
|
||||||
/**
|
|
||||||
* To use it via injection.
|
|
||||||
*
|
|
||||||
* ```kotlin
|
|
||||||
* @Inject
|
|
||||||
* @RestClient
|
|
||||||
* lateinit var myRemoteService: MyRemoteService
|
|
||||||
*
|
|
||||||
* fun doSomething() {
|
|
||||||
* val restClientExtensions = myRemoteService.getExtensionsById("io.quarkus:quarkus-rest-client")
|
|
||||||
* }
|
|
||||||
* ```
|
|
||||||
*/
|
|
||||||
@RegisterRestClient(baseUri = "https://stage.code.quarkus.io/api")
|
|
||||||
interface MyRemoteService {
|
|
||||||
|
|
||||||
@GET
|
|
||||||
@Path("/extensions")
|
|
||||||
fun getExtensionsById(@QueryParam("id") id: String): Set<Extension>
|
|
||||||
|
|
||||||
data class Extension(val id: String, val name: String, val shortName: String, val keywords: List<String>)
|
|
||||||
}
|
|
||||||
@@ -1,86 +1,30 @@
|
|||||||
package com.rak.service
|
package com.rak.service
|
||||||
|
|
||||||
import com.rak.config.RegionalSetDefinition
|
import com.rak.model.RegionalSet
|
||||||
import com.rak.config.SourcesConfiguration
|
|
||||||
import com.rak.config.Step
|
|
||||||
import com.rak.util.XPathUtil
|
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
import org.jsoup.Jsoup
|
import org.jsoup.Jsoup
|
||||||
import org.jsoup.nodes.Document
|
import org.jsoup.nodes.Document
|
||||||
import org.jsoup.nodes.Element
|
|
||||||
|
|
||||||
@ApplicationScoped
|
@ApplicationScoped
|
||||||
class ScrapeService(
|
class ScrapeService(
|
||||||
private val sourceService: SourceService
|
private val sourceService: SourceService,
|
||||||
|
private val extractionService: ExtractionService,
|
||||||
) {
|
) {
|
||||||
|
|
||||||
companion object {
|
|
||||||
private val TEXT_NODE_MATCHER: Regex = Regex("text\\(\\)$")
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun extractTextFromRootBySteps(
|
fun scrapeSet(
|
||||||
root: Element,
|
|
||||||
steps: Set<Step>
|
|
||||||
): String? {
|
|
||||||
var currentElement: Element? = root.clone()
|
|
||||||
var result: String? = null
|
|
||||||
|
|
||||||
for (index in 0 until steps.size) {
|
|
||||||
val currentStep = steps.elementAtOrNull(index) ?: return null
|
|
||||||
if (currentElement == null) {
|
|
||||||
throw IllegalStateException()
|
|
||||||
}
|
|
||||||
|
|
||||||
if (index == steps.size - 1) {
|
|
||||||
result = XPathUtil.extractResult(currentElement, currentStep.value())
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
currentElement = XPathUtil.getNextElement(currentElement, currentStep.value())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
fun extractSet(
|
|
||||||
provider: String,
|
provider: String,
|
||||||
setName: String,
|
setName: String,
|
||||||
): List<Map<String, String>> {
|
): List<RegionalSet> {
|
||||||
val source =
|
val source = sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
||||||
sourceService.getSourceById(provider) ?: throw IllegalArgumentException("Provider $provider not found")
|
|
||||||
|
|
||||||
|
|
||||||
val path: String = normalizePath(setName)
|
val path: String = normalizePath(setName)
|
||||||
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
val document: Document = Jsoup.connect("https://${source.getDomain()}/$path").get()
|
||||||
val regionalSetSelector = source.getItems().regionalSet().get()
|
|
||||||
|
|
||||||
val regionalSetRoot = document.select(regionalSetSelector.rootSelector().value())
|
return extractionService.extractSet(document, provider)
|
||||||
|
|
||||||
return regionalSetRoot.map {
|
|
||||||
val setId: String? = extractTextFromRootBySteps(
|
|
||||||
it,
|
|
||||||
regionalSetSelector.idSelector().steps()
|
|
||||||
)
|
|
||||||
val setLanguage: String? = extractTextFromRootBySteps(
|
|
||||||
it,
|
|
||||||
regionalSetSelector.languageSelector().steps()
|
|
||||||
)
|
|
||||||
val setKey: String? = extractTextFromRootBySteps(
|
|
||||||
it,
|
|
||||||
regionalSetSelector.regionKeySelector().steps()
|
|
||||||
)
|
|
||||||
|
|
||||||
mapOf(
|
|
||||||
Pair("id", setId ?: "N/A"),
|
|
||||||
Pair("language", setLanguage ?: "N/A"),
|
|
||||||
Pair("key", setKey ?: "N/A"),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun scrapeCard(
|
||||||
fun extractCard(
|
|
||||||
provider: String,
|
provider: String,
|
||||||
cardName: String,
|
cardName: String,
|
||||||
): Map<String, String> {
|
): Map<String, String> {
|
||||||
|
|||||||
13
src/main/kotlin/com/rak/service/TransformService.kt
Normal file
13
src/main/kotlin/com/rak/service/TransformService.kt
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
package com.rak.service
|
||||||
|
|
||||||
|
import com.rak.model.transform.TransformationRegistry
|
||||||
|
import jakarta.enterprise.context.ApplicationScoped
|
||||||
|
|
||||||
|
@ApplicationScoped
|
||||||
|
class TransformService(
|
||||||
|
private val transformationRegistry: TransformationRegistry = TransformationRegistry()
|
||||||
|
) {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user